import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# 读取CSV数据
df = pd.read_csv('insurance.csv')
# 把性别、吸烟者、地区这些分类变量数值化
df['sex'] = df['sex'].map({'female': 0, 'male': 1})
df['smoker'] = df['smoker'].map({'yes': 1, 'no': 0})
df['region'] = df['region'].map({'southwest': 0, 'southeast': 1, 'northwest': 2, 'northeast': 3})

# 下面我们进行一些数据的可视化操作

# 热力图
corr = df.corr()
plt.figure(figsize=(10, 10))
sns.heatmap(corr, vmax=1, square=True, annot=True,cmap='RdYlGn')
plt.title('Correlation matrix of the data')
plt.show()

# 散点图矩阵
sns.pairplot(df)
plt.show()

# 盒图观察charges与smoker之间的关系
plt.figure(figsize=(8,5))
sns.boxplot(x="smoker", y="charges", data=df)
plt.title("Box plot of charges for smokers and non-smokers")
plt.show()

# 相关矩阵可视化
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(), annot=True, cmap='Blues')
plt.title("Correlation Matrix of Features")
plt.show()